Plotting the variables for ensemble runs¶

Notes:

  • enabled reading data on Baseline

Tips Baseline¶

  1. Use pyces env
In [ ]:
import os,glob
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import socket
#yimport pyreadr # to read .rds files

Some Useful functions¶

In [ ]:
def print_dict_tree(d, indent=0):
    """
    Print the tree of the dictionary
    """
    for key, value in d.items():
        print(' ' * indent + str(key))
        if isinstance(value, dict):
            print_dict_tree(value, indent + 4)

Check which machine you are on! The path names will vary accordingly¶

In [ ]:
machine_name = socket.gethostname()
if "baseline" in machine_name:
    print("We are on: baseline")

elif "or-slurm" in machine_name:
    print ("We are on: cades")

elif "MAC132004" in machine_name:
    print("We are on: macbook")
else:
    print("other than baseline, cades, or LAB macbook")
We are on: baseline

Copy all the fates_parameter files from ad_spinup run dir to following path. Use rsync_params.sh!

In [ ]:
# Paths
if "baseline" in machine_name:
    path_params_head = "/gpfs/wolf2/cades/cli185/scratch/ud4/FATES_Outputs/runs/UQ/params/"
    path_processed_head = "/gpfs/wolf2/cades/cli185/scratch/ud4/FATES_Outputs/runs/"
    path_ensemble_vals = "/ccsopen/home/ud4/models/OLMT/data/lnd/clm2/paramdata/ensemble/"

if "MAC132004" in machine_name:
    path_params_head = "/Users/ud4/FATESMDS_analysis/outputs/runs/tests_alp/2024/UQ/params/"
    path_processed_head = "/Users/ud4/FATESMDS_analysis/outputs/runs/tests_alp/2024/"
In [ ]:
N_ensembles=216

params_list = ["fates_cnp_vmax_nh4","fates_cnp_vmax_no3","fates_cnp_vmax_p"]
In [ ]:
# Path to save results

path_save = "/gpfs/wolf2/cades/cli185/scratch/ud4/Results/EN_Vmax/"
In [ ]:
fnames={} # processed data
fnames_params={} # processed data
#FACE_1PFT_AllomBlVmax_r240424_woL2FR_CONew_enVmax_ECA_processed
case_id = "FACE_1PFT_AllomBlVmax_r240424_woL2FR_CONew_enVmax"
case_id = "FACE_1PFT_AllomBlVmax_r240424_woL2FR_CONew_enVmax_RD"
case_id = "FACE_1PFT_AllomBlVmax_r240424_woL2FR_CONew_enVmax_ECA"
case_id = "FACE_1PFT_AllomBlVmax_r240424_wL2FR_CONew_enVmax"
case_id = "FACE_1PFT_AllomBlVmax_r240424_wL2FR_CONew_enVmax_RD"
case_id = "FACE_1PFT_AllomBlVmax_r240424_wL2FR_CONew_enVmax_ECA"
case_id = "FACE_1PFT_AllomBl_r240424_woL2FRf_CONew_enVmax_ECA"
#case_id = "FACE_1PFT_AllomBl_r240424_woL2FRf_CONew_enVmax_RD"
#case_id = "FACE_1PFT_AllomBlVmax_r240424_woL2FRfull_CONew_enVmax"
sites= ("US-ORN", "US-DUK")
#sites= ("US-ORN",)#, "US-DUK")
for site in sites:
    fnames_params[site] = {}
    for idx in range(int(N_ensembles)):
        # C-Only
        i= idx+1
        fnames[f"{case_id}_{site}_spins_g{i:05d}"] = f"{path_processed_head}{case_id}_processed/{case_id}_{site}_spins_g{i:05d}.nc"
        fnames_params[site][f"{case_id}_{site}_spins_g{i:05d}"] = f"{path_params_head}{case_id}_{site}_I1850ELMFATES_ad_spinup/fates_params_{i:05d}.nc"
In [ ]:
fnames_params[site][f"{case_id}_{site}_spins_g{i:05d}"]
Out[ ]:
'/gpfs/wolf2/cades/cli185/scratch/ud4/FATES_Outputs/runs/UQ/params/FACE_1PFT_AllomBl_r240424_woL2FRf_CONew_enVmax_ECA_US-DUK_I1850ELMFATES_ad_spinup/fates_params_00216.nc'

if params error add the case dir in case_params_copy.txt
then run rsync_params.sh

In [ ]:
ds_params = {}
dict_params = {} # to store the params values

for site in sites:
    dict_params[site] = {}
    for parm in params_list:
        dict_params[site][parm] = []

for site in sites:
    for idx, key in enumerate(fnames_params[site].keys()):
        #print (key)
        ds_params[key] = xr.open_mfdataset(fnames_params[site][key], decode_times=False)
        # making list of param values
        for parm in params_list:
            #print (f"{key}: {ds_params[key][parm].values[0]} ")
            dict_params[site][parm].append(ds_params[key][parm].values[0])
In [ ]:
import pandas as pd

# Replace 'file.txt' with the path to your text file
df_params = pd.read_csv(f'{path_ensemble_vals}bs_Vmax2_parm_vals',
                        names= params_list,
                        delim_whitespace=True)
df_params.index = ['g{:05d}'.format(i+1) for i in range(len(df_params))]

# Display the DataFrame
print(df_params)

plt.figure(figsize=(15,5))
df_params.plot(figsize=(15,5))

# Take the logarithm base 10 of the DataFrame values
df_log10 = np.log10(df_params)

# Plot the columns

df_log10.plot(figsize=(15,5),title="g")
plt.ylabel('log10 values')
plt.savefig(f"{path_save}/Params_en.png")
/tmp/ipykernel_1481038/2081599401.py:4: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\s+'`` instead
  df_params = pd.read_csv(f'{path_ensemble_vals}bs_Vmax2_parm_vals',
        fates_cnp_vmax_nh4  fates_cnp_vmax_no3  fates_cnp_vmax_p
g00001        1.000000e-11        1.000000e-11      1.000000e-11
g00002        1.000000e-11        1.000000e-11      1.000000e-10
g00003        1.000000e-11        1.000000e-11      1.000000e-09
g00004        1.000000e-11        1.000000e-11      1.000000e-08
g00005        1.000000e-11        1.000000e-11      1.000000e-07
...                    ...                 ...               ...
g00212        1.000000e-06        1.000000e-06      1.000000e-10
g00213        1.000000e-06        1.000000e-06      1.000000e-09
g00214        1.000000e-06        1.000000e-06      1.000000e-08
g00215        1.000000e-06        1.000000e-06      1.000000e-07
g00216        1.000000e-06        1.000000e-06      1.000000e-06

[216 rows x 3 columns]
<Figure size 1500x500 with 0 Axes>
No description has been provided for this image
No description has been provided for this image
In [ ]:
# Making a dataframe that will store the sum of first 20 years of GPP.
In [ ]:
# extracting the timeseries of variables for selected variables and sites
dict_vars_data = {}
# also add the sum of n years of variable for sensitivity analysis
sum_n_years= 20
dict_param_sum_var = {}


variables = ["FATES_GPP",
             "FATES_NPP"]
for var in variables:
    dict_vars_data[var] = {}
    dict_param_sum_var[var] = {}
    for site in sites:
        dict_vars_data[var][site]={}
        dict_param_sum_var[var] [site]={}
        dict_param_sum_var [var] [site] = df_params.copy(deep=True)
        for key in fnames.keys():
            if site in key: # Storing the ORNL and Duke data separately
                dict_vars_data[var][site][key.split("_")[-1]]= xr.open_dataset (fnames[key], decode_times=False) [var]
                sum_var = dict_vars_data[var][site][key.split("_")[-1]][:sum_n_years].sum().values
                dict_param_sum_var [var] [site] .loc [key.split("_")[-1], f"Sum_{var}"]  =  sum_var
In [ ]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

#var = "FATES_GPP"
#site = "US-ORN"
for var in variables:
    for site in sites:
        df = dict_param_sum_var [var] [site].copy(deep =True)

        # Split dataset into features (parameters) and target variable (output)
        X = df[params_list]  # Features
        y = df[f"Sum_{var}"]  # Target variable

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Fit regression models for each parameter separately
        models = {}
        for param in X.columns:
            # Choose a regression model (e.g., Linear Regression, Random Forest)
            model = LinearRegression()  # Change this to RandomForestRegressor() for Random Forest

            # Fit the model
            model.fit(X_train[[param]], y_train)

            # Store the model
            models[param] = model

        # Evaluate model performance (e.g., R-squared)
        for param, model in models.items():
            score = model.score(X_test[[param]], y_test)
            print(f'R-squared for {param}: {score}')

        # Compare coefficients or feature importances
        for param, model in models.items():
            if isinstance(model, LinearRegression):
                print(f'Coefficient for {param}: {model.coef_}')
            elif isinstance(model, RandomForestRegressor):
                print(f'Feature importance for {param}: {model.feature_importances_}')
        # Assuming X_train and y_train are your training data
        # Train a RandomForestRegressor model
        model = RandomForestRegressor()
        model.fit(X_train, y_train)

        # Get feature importances from the trained model
        feature_importances = model.feature_importances_

        # Create a DataFrame to store feature importances
        importance_df = pd.DataFrame({'Feature': X_train.columns, 'Importance': feature_importances})

        # Sort the DataFrame by importance values
        importance_df = importance_df.sort_values(by='Importance', ascending=False)

        tmp_case_name = "_".join([case_id.split("_")[3],case_id.split("_")[4],case_id.split("_")[-1]])

        # Plot feature importances
        plt.figure(figsize=(10, 6))
        plt.bar(importance_df['Feature'], importance_df['Importance'])
        plt.xlabel('Feature')
        plt.ylabel('Importance')
        plt.title(f'Feature Importance {tmp_case_name}_{var}_{site}')
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.savefig(f"{path_save}/Sensivity_{tmp_case_name}_{var}_{site}.png")
        plt.show()
R-squared for fates_cnp_vmax_nh4: -0.4572103484791634
R-squared for fates_cnp_vmax_no3: -4.632943466754831
R-squared for fates_cnp_vmax_p: -0.15653202879656392
Coefficient for fates_cnp_vmax_nh4: [0.0138936]
Coefficient for fates_cnp_vmax_no3: [0.30688976]
Coefficient for fates_cnp_vmax_p: [0.16201549]
No description has been provided for this image
R-squared for fates_cnp_vmax_nh4: -0.21041116874167454
R-squared for fates_cnp_vmax_no3: -0.7464408192417893
R-squared for fates_cnp_vmax_p: -0.03870875673500529
Coefficient for fates_cnp_vmax_nh4: [0.04036886]
Coefficient for fates_cnp_vmax_no3: [0.33672109]
Coefficient for fates_cnp_vmax_p: [0.26355865]
No description has been provided for this image
R-squared for fates_cnp_vmax_nh4: -0.41709236978943864
R-squared for fates_cnp_vmax_no3: -4.310012038056727
R-squared for fates_cnp_vmax_p: -0.13753643076658761
Coefficient for fates_cnp_vmax_nh4: [0.00570502]
Coefficient for fates_cnp_vmax_no3: [0.20276171]
Coefficient for fates_cnp_vmax_p: [0.10791616]
No description has been provided for this image
R-squared for fates_cnp_vmax_nh4: -0.21164497658183423
R-squared for fates_cnp_vmax_no3: -0.7462575674514049
R-squared for fates_cnp_vmax_p: -0.038932800186233685
Coefficient for fates_cnp_vmax_nh4: [0.02810115]
Coefficient for fates_cnp_vmax_no3: [0.23259579]
Coefficient for fates_cnp_vmax_p: [0.18305138]
No description has been provided for this image
In [ ]:
#dict_param_sum_var [var] [site]
#site
case_id
Out[ ]:
'FACE_1PFT_AllomBl_r240424_woL2FRf_CONew_enVmax_ECA'
In [ ]:
#print (params_list)
for var in variables:
    for site in sites:
        df = dict_param_sum_var [var] [site].copy(deep =True)
        marker_styles = ['o', 's', '^', 'D', 'x', '*']
        plt.figure(figsize=(8, 6))
        count = 0
        for p1 in df_params[params_list[0]].unique():
            for p2 in df_params[params_list[1]].unique():
                for idx_p3, p3 in enumerate (df_params[params_list[2]].unique()):
                    subset = df_params[(df_params[params_list[0]] == p1) &
                                        (df_params[params_list[1]] == p2) &
                                        (df_params[params_list[2]] == p3)]
                    plt.plot(dict_vars_data[var][site][subset.index[0]], marker = marker_styles[idx_p3], label=f'{(p1,p2,p3)}', alpha =0.5)
                    count+=1
        plt.title(f'{tmp_case_name}_{var}_{site}')
        plt.xlabel('Index')
        plt.ylabel('Output Variable')
        #plt.legend()
        plt.grid(True)
        plt.savefig(f"{path_save}/TS_all_{tmp_case_name}_{var}_{site}.png")
        plt.show()
        print (count)
No description has been provided for this image
216
No description has been provided for this image
216
No description has been provided for this image
216
No description has been provided for this image
216

print (params_list)¶

for var in variables: for site in sites: df = dict_param_sum_var [var] [site].copy(deep =True) marker_styles = ['o', 's', '^', 'D', 'x', '*'] count = 0 for idx_p1, p1 in enumerate (df_params[params_list[0]].unique()): for idx_p2, p2 in enumerate(df_params[params_list[1]].unique()): for idx_p3, p3 in enumerate (df_params[params_list[2]].unique()): plt.figure(figsize=(8, 6)) subset = df_params[(df_params[params_list[0]] == p1) & (df_params[params_list[1]] == p2) & (df_params[params_list[2]] == p3)] plt.plot(dict_vars_data[var][site][subset.index[0]], marker = marker_styles[idx_p3], label=f'{(p1,p2,p3)}', alpha =0.5) count+=1 plt.title(f'{tmp_case_name}{var}{site}') plt.xlabel('Index') plt.ylabel('Output Variable') plt.legend() plt.ylim(0,1e-7) plt.grid(True) #plt.savefig(f"{path_save}/TS_{tmp_case_name}{var}{site}_{subset.index[0]}.png") #plt.show() print (var, site, count)

In [ ]:
subset.index[0]
Out[ ]:
'g00216'
In [ ]:
 
In [ ]:
print (params_list)
df = dict_param_sum_var [var] [site].copy(deep =True)
marker_styles = ['o', 's', '^', 'D', 'x', '*']
for p1 in df_params[params_list[0]].unique():
    for p2 in df_params[params_list[1]].unique():
        plt.figure(figsize=(8, 6))
        for idx_p3, p3 in enumerate (df_params[params_list[2]].unique()):
            subset = df_params[(df_params[params_list[0]] == p1) &
                                 (df_params[params_list[1]] == p2) &
                                 (df_params[params_list[2]] == p3)]
            plt.plot(dict_vars_data[var][site][subset.index[0]], marker = marker_styles[idx_p3], label=f'{(p1,p2,p3)}', alpha =0.5)
        plt.title(f'Output Variable vs. Parameter1 (Param1={p3})')
        plt.xlabel('Index')
        plt.ylabel('Output Variable')
        plt.legend()
        plt.grid(True)
        plt.show()
['fates_cnp_vmax_nh4', 'fates_cnp_vmax_no3', 'fates_cnp_vmax_p']
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
print (params_list)
df = dict_param_sum_var [var] [site].copy(deep =True)
marker_styles = ['o', 's', '^', 'D', 'x', '*']
for p1 in df_params[params_list[0]].unique():
    for p2 in df_params[params_list[1]].unique():
        plt.figure(figsize=(8, 6))
        for idx_p3, p3 in enumerate (df_params[params_list[2]].unique()):
            subset = df[(df_params[params_list[0]] == p1) &
                                 (df_params[params_list[1]] == p2)]
        plt.plot(subset [params_list[2]], subset [f"Sum_{var}"],label=f'{(p1,p2)}',  alpha =0.5)
        plt.legend()
        plt.yscale('log')
        plt.xscale('log')
        plt.title(f'Output Variable vs. Parameter1 (Param1={p3})')
        plt.xlabel('params_list[2]')
['fates_cnp_vmax_nh4', 'fates_cnp_vmax_no3', 'fates_cnp_vmax_p']
/tmp/ipykernel_1481038/467480283.py:6: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`.
  plt.figure(figsize=(8, 6))
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
for var in variables:
    for site in sites:
        fig, ax = plt.subplots()
        for key in fnames.keys():
            if site in key:
                data = dict_vars_data[var][site][key.split("_")[-1]]
                ax.plot(data, label=key.split("_")[-1])
        ax.set_title(f"{var} at {site}")
        plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
# how to check which variable affects GPP the most?
df_params[params_list[0]].unique()
Out[ ]:
array([1.e-11, 1.e-10, 1.e-09, 1.e-08, 1.e-07, 1.e-06])
In [ ]:
 

print (params_list) for p1 in df_params[params_list[0]].unique(): plt.figure(figsize=(8, 6)) for p2 in df_params[params_list[1]].unique(): for p3 in df_params[params_list[2]].unique(): subset = df_params[(df_params[params_list[0]] == p1) & (df_params[params_list[1]] == p2) & (df_params[params_list[2]] == p3)] plt.plot(dict_vars_data[var][site][subset.index[0]], label=f'Param2={p2}, Param3={p3}', alpha =.2) plt.title(f'Output Variable vs. Parameter1 (Param1={p1})') plt.xlabel('Index') plt.ylabel('Output Variable') plt.legend() plt.grid(True) plt.show()

In [ ]:
print (params_list)
marker_styles = ['o', 's', '^', 'D', 'x', '*']
for p1 in df_params[params_list[0]].unique():
    for p2 in df_params[params_list[1]].unique():
        plt.figure(figsize=(8, 6))
        for idx_p3, p3 in enumerate (df_params[params_list[2]].unique()):
            subset = df_params[(df_params[params_list[0]] == p1) &
                                 (df_params[params_list[1]] == p2) &
                                 (df_params[params_list[2]] == p3)]

            range = dict_vars_data[var][site][subset.index[0]].max() - dict_vars_data[var][site][subset.index[0]].min()
            #if range > 3e-08:
            if True:
                plt.plot(dict_vars_data[var][site][subset.index[0]], marker = marker_styles[idx_p3], label=f'{(p1,p2,p3)}', alpha =0.5)
        plt.title(f'Output Variable vs. Parameter1 (Param1={p3})')
        plt.xlabel('Index')
        plt.ylim(0,1e-7)
        plt.ylabel('Output Variable')
        plt.legend()
        plt.grid(True)
        plt.show()
['fates_cnp_vmax_nh4', 'fates_cnp_vmax_no3', 'fates_cnp_vmax_p']
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: